** Importing results from Machine Learning 
clear
import delimited "$maindir/Results/Model_Outputs/CV_predictpre_best.csv", varnames(1) case(preserve)

rename pred_cv cvpreds
rename pred_best pred_full

replace cvpreds = "" if cvpreds=="NA"
destring cvpreds, replace

gen meterend = mdy(end_month, end_day, end_year)
format meterend %d

drop end_day end_month end_year

**** prediction errors
gen cvresids = total_mmbtu - cvpreds
gen cverrors = 100*cvresids/total_mmbtu

gen inresids_full = total_mmbtu - pred_full
gen inerrors_full = 100*inresids_full/total_mmbtu

keep Household meterend total_mmbtu kfold-inerrors_full


**************** merging with bootstrapped predictions
tempfile mlpredictions
save `mlpredictions'
** Importing results from bootstrapping
forval i = 1/200 {
	clear
	import delimited "$maindir/Results/Model_Outputs/predpre_ihwap_boots`i'.csv", varnames(1) case(preserve)
	
	rename pred_best pred_boots`i'
	drop total_mmbtu

	gen meterend = mdy(end_month, end_day, end_year)
	format meterend %d
	drop end_day end_month end_year
	
	duplicates tag Household meterend, gen(tagdups)
	gen weight_boots`i' = tagdups+1
	drop tagdups
	duplicates drop
	
	merge 1:1 Household meterend using `mlpredictions', nogen
	
	**** prediction errors
	gen resids_boots`i' = total_mmbtu - pred_boots`i'
	gen errors_boots`i' = 100*resids_boots`i'/total_mmbtu
	order pred_boots`i' resids_boots`i' errors_boots`i', after(weight_boots`i')
	
	tempfile mlpredictions
	save `mlpredictions'
}


**** finally combine all bootstrap and ML predictions with original IHWAP data
merge 1:1 Household meterend using "$maindir/Data/ihwap_anonym.dta", nogen

** log transform energy usage variables
forval i = 1/200 {
gen log_pred_boots`i' = log(pred_boots`i')
}

gen log_total_mmbtu = log(total_mmbtu)
gen log_electric_mmbtu = log(electric_mmbtu)
/* gas usage alone might have a lot of informative zeros (during summer)
, so we add 0.1 to gas usage before taking the log */
gen log_gas_mmbtu = log(gas_mmbtu+0.1)

** some date auxiliary variables
gen aux1 = AuditDate - meterend
gen aux2 = meterend - FinalDate
gen twoyears_prepost=1 if aux1>0 & aux1<=730
replace twoyears_prepost=1 if aux2>0 & aux2<=730

* normalizing months since treatment
gen mst = ""
forval i = 30(30)360 {
local ival = `i'/30
replace mst = "`ival'" if aux2>=`i' & aux2<`i' + 30
}
forval i = 0(30)360 {
local ival = `i'/30 + 1
replace mst = "-`ival'" if aux1>=`i' & aux1<`i' + 30
}
drop aux1 aux2

gen counter = 1
gen months_since_treat = .
forval i = -12(1)-1 {
replace months_since_treat = counter if mst=="`i'"
replace counter = counter+1
}
forval i = 1(1)12 {
replace months_since_treat = counter if mst=="`i'"
replace counter = counter+1
}
labmask months_since_treat, values(mst)
drop counter

* restrict sample to monthly observations
gen meterdays_monthly = meterdays
replace meterdays_monthly = . if meterdays<25 | meterdays>36

label define treat 0 "Not treated" 1 "WAP Treatment"
label values treated treat

**** machine learning savings
gen tau_ml = total_mmbtu - pred_full
winsor2 tau_ml, cuts(0.5 99.5) trim replace

forval i = 1/200 {
gen tau`i' = total_mmbtu - pred_boots`i'
winsor2 tau`i', cuts(0.5 99.5) trim replace
}

* in percent
gen pct_tau_ml = (total_mmbtu - pred_full)/pred_full
winsor2 pct_tau_ml, cuts(0.5 99.5) trim replace

forval i = 1/200 {
gen pct_tau`i' = (total_mmbtu - pred_boots`i')/pred_boots`i'
winsor2 pct_tau`i', cuts(0.5 99.5) trim replace
}


****** merge with some missing information from State files
tempfile ihwapboots
save `ihwapboots'

clear
use "$maindir/Data/ihwap_state.dta"

keep if BuildingType=="Single Family"
keep if MainHeatFuel=="1 - Natural Gas"

* binned dollars spent
foreach x in Real_tot_actAirSeal Real_tot_actBaseload Real_tot_actDoor Real_tot_actHealSfty Real_tot_actWtHtr {
	gen bin`x' = .
	forval i = 0(100)900 {
		replace bin`x' = `i' if `x'>`i'-100 & `x'<=`i'
	}
	replace bin`x' = 1000 if `x'>900 & `x'!=.
}
foreach x in Real_tot_actFoundation Real_tot_actGeneral Real_tot_actWindow {
	gen bin`x' = .
	forval i = 0(200)2000 {
		replace bin`x' = `i' if `x'>`i'-200 & `x'<=`i'
	}
	replace bin`x' = 2200 if `x'>2000 & `x'!=.
}
foreach x in Real_tot_actAttic Real_tot_actWallIns {
	gen bin`x' = .
	forval i = 0(300)2700 {
		replace bin`x' = `i' if `x'>`i'-300 & `x'<=`i'
	}
	replace bin`x' = 3000 if `x'>2700 & `x'!=.
}
foreach x in Real_oth_cost Real_tot_actFurnace {
	gen bin`x' = .
	forval i = 0(300)3000 {
		replace bin`x' = `i' if `x'>`i'-300 & `x'<=`i'
	}
	replace bin`x' = 3300 if `x'>3000 & `x'!=.
}
gen bin_TotalCost = .
forval i = 0(500)9000 {
	replace bin_TotalCost = `i' if Real_total_cost>`i'-500 & Real_total_cost<=`i'
}
replace bin_TotalCost = 9500 if Real_total_cost>9000 & Real_total_cost!=.
replace bin_TotalCost = 500 if bin_TotalCost==0

foreach var of varlist binReal_tot_actAirSeal-binReal_tot_actWallIns {
	replace `var' = 1 if `var'==0
}
foreach x in Real_tot_actAirCon {
	gen bin`x' = .
	forval i = 0(100)500 {
		replace bin`x' = `i' if `x'>`i'-100 & `x'<=`i'
	}
	replace bin`x' = 600 if `x'>500 & `x'!=.
	replace bin`x' = 1 if bin`x'==0
}

drop binReal_oth_cost

foreach x in Attic AirCon AirSeal Baseload Door HealSfty WtHtr Foundation General Window Furnace WallIns {
rename binReal_tot_act`x' bin`x'
}

*** merge back with energy data
merge 1:m Household using `ihwapboots', nogen

* Separating usage pre and post WAP, and creating artificial treatment variable
reghdfe log_total_mmbtu i.treated c.HDD60 c.HDD60#c.HDD60 c.CDD75 c.CDD75#c.CDD75 c.precip c.tmax c.tmin if twoyears_prepost==1 & meterdays_monthly!=., absorb(i.Household i.month_year) vce(cluster Household end_month)
gen regsample = e(sample) /* to keep stable sample across regressions */

replace regsample = 2 if regsample==0
sort Household regsample
by Household : gen job_obs = _n
gen ww_treat = 0 if job_obs==1 & regsample==1
replace ww_treat = 1 if job_obs==2 & regsample==1
label values ww_treat treat

** savings measures
gen projected_savings = (ProjectedConsumption - ExistingConsumption)/ExistingConsumption
replace projected_savings = 0 if treated==0

gen realized_savings = tau_ml/pred_full if treated==1
replace realized_savings = cvresids/cvpreds if treated==0

gen savings_gap = realized_savings - projected_savings
winsor2 savings_gap, replace trim cuts(0.5 99.5)

**** bootstrap loop for savings gap
forval i = 1/200 {
gen realized_savings`i' = tau`i'/pred_boots`i'
gen savings_gap`i' = realized_savings`i' - projected_savings
winsor2 savings_gap`i', replace trim cuts(0.5 99.5)
}

*** demographic bundles
sum Real_income1000 if ww_treat==1 , detail
gen income_level = 1 if Real_income1000<r(p25)
replace income_level = 2 if Real_income1000>=r(p25) & Real_income1000<r(p75)
replace income_level = 3 if Real_income1000>=r(p75) & Real_income1000!=.

sum Age if ww_treat==1, detail
gen age_level = 1 if Age<r(p25)
replace age_level = 2 if Age>=r(p25) & Age<r(p75)
replace age_level = 3 if Age>=r(p75) & Age!=.

gen hhsize = 1 if noccupants==1
replace hhsize = 2 if noccupants>=2 & noccupants<=3
replace hhsize = 3 if noccupants>=4 & noccupants!=.

* grouping demographics
egen group_demographics = group(income_level age_level hhsize)


***** bundles of amount spent in the 4 major categories
foreach x in AirSeal Attic Furnace WallIns {
gen `x'_cat = 1 if tot_act`x'==0
}

replace Furnace_cat = 0 if tot_actFurnace>900 & tot_actFurnace<=1800
replace Furnace_cat = 2 if tot_actFurnace>0 & tot_actFurnace<=900
replace Furnace_cat = 3 if tot_actFurnace>1800

replace Attic_cat = 0 if tot_actAttic>600 & tot_actAttic<=1600
replace Attic_cat = 2 if tot_actAttic>0 & tot_actAttic<=600
replace Attic_cat = 3 if tot_actAttic>1600

replace AirSeal_cat = 0 if tot_actAirSeal>200 & tot_actAirSeal<=500
replace AirSeal_cat = 2 if tot_actAirSeal>0 & tot_actAirSeal<=200
replace AirSeal_cat = 3 if tot_actAirSeal>500

replace WallIns_cat = 0 if tot_actWallIns>900 & tot_actWallIns<=1500
replace WallIns_cat = 2 if tot_actWallIns>0 & tot_actWallIns<=900
replace WallIns_cat = 3 if tot_actWallIns>1500

egen spending_cat = group(AirSeal_cat Attic_cat Furnace_cat WallIns_cat)
replace spending_cat = 0 if AirSeal_cat==0 & Attic_cat==0 & Furnace_cat==0 & WallIns_cat==0

label define airseallab 0 "Med. AirSeal" 1 "Zero AirSeal" 2 "Low AirSeal" 3 "High AirSeal"
label define atticlab 0 "Med. Attic" 1 "Zero Attic" 2 "Low Attic" 3 "High Attic"
label define furnacelab 0 "Med. Furnace" 1 "Zero Furnace" 2 "Low Furnace" 3 "High Furnace"
label define wallinslab 0 "Med. WallIns" 1 "Zero WallIns" 2 "Low WallIns" 3 "High WallIns"

label values AirSeal_cat airseallab
label values Attic_cat atticlab
label values Furnace_cat furnacelab
label values WallIns_cat wallinslab


******** merging with information about PRISM engineering model
* NOTE: raw data for home-specific PRISM analyses are not provided due to confidentiality concerns
* raw data and code for this portion can be provided upon request
tempfile ihwapwedge
save `ihwapwedge'

clear
use "$maindir/Results/Model_Outputs/prism_gas.dta"

keep Household max HDDbest treated
duplicates drop

merge 1:m Household treated using `ihwapwedge', gen(prismmerge)
drop if prismmerge==1
drop prismmerge

sort Household meterend
label data ""
compress


****** save, splitting into 4 parts to circumvent file size limits from Dataverse
tempfile ihwapwedge
save `ihwapwedge'

keep if _n <= 190774
save "$maindir/Results/Model_Outputs/ihwap_wedge1.dta", replace

clear
use `ihwapwedge'

keep if _n > 190774 & _n <= 381548
save "$maindir/Results/Model_Outputs/ihwap_wedge2.dta", replace

clear
use `ihwapwedge'

keep if _n > 381548 & _n <= 572322
save "$maindir/Results/Model_Outputs/ihwap_wedge3.dta", replace

clear
use `ihwapwedge'

keep if _n > 572322 
save "$maindir/Results/Model_Outputs/ihwap_wedge4.dta", replace

clear
use `ihwapwedge'
